only ILC2 selected
(without the upper left CC part)
(which could be mixed with other celltypes like stromal)
source("../../../analysis.10x.r")
GEX.seur.raw <- readRDS("../analysis_1101/sc10x.GEX.seur.pre1105.rds")
GEX.seur.raw
## An object of class Seurat
## 16896 features across 18482 samples within 1 assay
## Active assay: RNA (16896 features, 1500 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
DimPlot(GEX.seur.raw, reduction = "umap", group.by = "seurat_clusters",label = T, repel = F, label.size = 3.5) +
DimPlot(GEX.seur.raw, reduction = "umap", group.by = "preAnno_2",label = T, repel = F, label.size = 3.2)
only cluster_1,3,4,5 selected
DimPlot(GEX.seur.raw, reduction = "umap", group.by = "seurat_clusters",label = T, repel = F, label.size = 3.5) +
FeaturePlot(GEX.seur.raw, reduction = "umap", features = "percent.mt")
if could exclude those little group of ILC2 cells with high mito and low nFeature
DimPlot(GEX.seur.raw, reduction = "umap", group.by = "seurat_clusters",label = T, repel = F, label.size = 3.5) +
FeaturePlot(subset(GEX.seur.raw, subset=percent.mt <7.5)
, reduction = "umap", features = "percent.mt")
FeaturePlot(GEX.seur.raw, reduction = "umap", features = c("nFeature_RNA","nCount_RNA"))
FeaturePlot(subset(GEX.seur.raw, subset= nCount_RNA < 25000 & nFeature_RNA < 4500 & nFeature_RNA > 1000 & percent.mt < 7.5),
reduction = "umap", features = c("nFeature_RNA","nCount_RNA"))
GEX.seur <- subset(GEX.seur.raw, subset = seurat_clusters %in% c(1,3,4,5))
GEX.seur <- subset(GEX.seur, subset= nCount_RNA < 16000 & nFeature_RNA < 3600 & nFeature_RNA > 1000 & percent.mt < 7.5)
GEX.seur <- subset(GEX.seur, features = rownames(GEX.seur)[rowSums(GEX.seur@assays[['RNA']]@data > 0)>=3])
GEX.seur
## An object of class Seurat
## 15083 features across 9076 samples within 1 assay
## Active assay: RNA (15083 features, 1258 variable features)
## 3 dimensional reductions calculated: pca, tsne, umap
quantile(GEX.seur$nFeature_RNA,prob=seq(0,1,0.025))
## 0% 2.5% 5% 7.5% 10% 12.5% 15% 17.5%
## 1001.000 1091.000 1159.000 1209.000 1244.000 1277.000 1307.000 1336.000
## 20% 22.5% 25% 27.5% 30% 32.5% 35% 37.5%
## 1362.000 1384.000 1405.000 1429.000 1453.000 1475.000 1497.250 1522.000
## 40% 42.5% 45% 47.5% 50% 52.5% 55% 57.5%
## 1543.000 1566.000 1586.000 1608.000 1631.000 1656.000 1677.250 1701.000
## 60% 62.5% 65% 67.5% 70% 72.5% 75% 77.5%
## 1726.000 1754.875 1784.000 1819.000 1851.000 1890.375 1932.250 1980.125
## 80% 82.5% 85% 87.5% 90% 92.5% 95% 97.5%
## 2028.000 2088.000 2151.750 2235.000 2329.500 2461.000 2618.250 2861.000
## 100%
## 3591.000
DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno_1",label = F, repel = F, label.size = 3.5) +
DimPlot(GEX.seur, reduction = "umap", group.by = "preAnno_2",label = F, repel = F, label.size = 3.2)
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1",label = F, repel = F, label.size = 3.5) +
FeaturePlot(GEX.seur, reduction = "umap", features = "percent.mt")
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "orig.ident") +
coord_cartesian(xlim =c(0, 30000), ylim = c(0, 10))
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "orig.ident") +
coord_cartesian(xlim =c(0, 30000), ylim = c(0, 5000))
plota + plotb
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA"))
FeaturePlot(subset(GEX.seur, subset= nCount_RNA < 16000 & nFeature_RNA < 3200 & nFeature_RNA > 1000 & percent.mt < 7.5),
reduction = "umap", features = c("nFeature_RNA","nCount_RNA"))
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "SP.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "FB.info")
DimPlot(GEX.seur, group.by = "FB.info", split.by = "FB.info", ncol = 4,
cols = c("#05BE78","#5ECDF2","#739BDD","#B7E16F","#87C0C9","#7756FA","#9855B9","#6F9920"))
DimPlot(GEX.seur, group.by = "SP.info", split.by = "SP.info", ncol = 3)
genes_to_check <- c("Fcer1g","Klrb1c","Ncr1","Il22",
"Ccr6","Tbx21","Ifng","Eomes",
"Gata3","Il1rl1","Il13","Il17a",
"Il4","Il5","Klrg1","Rorc",
"Il7r","Thy1","Kdm6b","Ptprc",
"Cd3d","Cd3g","Cd4",
#"Cd8a",
"Trac","Trdc","Epcam","Pecam1","Siglech",
"Hist1h1b","Top2a","Mcm6","Mki67")
FeaturePlot(GEX.seur, features = genes_to_check, ncol = 4)
# remove unwanted metadata
GEX.seur@meta.data <- GEX.seur@meta.data[,c(1:8,12,14,15:17)]
head(GEX.seur@meta.data)
## orig.ident nCount_RNA nFeature_RNA percent.mt FB.info
## AAACCCAAGCATCCTA-1 ILC2_GEX 13805 3251 3.940601 hashtag6
## AAACCCAAGGGCAACT-1 ILC2_GEX 5200 2039 3.518554 hashtag5
## AAACCCAAGGTGCCAA-1 ILC2_GEX 4848 1503 3.361518 hashtag1
## AAACCCACAGAAGTGC-1 ILC2_GEX 5665 1673 4.165931 hashtag3
## AAACCCAGTAGGGTAC-1 ILC2_GEX 3458 1574 1.098583 hashtag5
## AAACCCATCGGTCGAC-1 ILC2_GEX 3298 1169 4.093390 hashtag6
## S.Score G2M.Score Phase DoubletFinder0.05
## AAACCCAAGCATCCTA-1 -0.08767960 -0.3449294 G1 Singlet
## AAACCCAAGGGCAACT-1 -0.04374438 -0.1981341 G1 Singlet
## AAACCCAAGGTGCCAA-1 -0.03750102 -0.1720957 G1 Singlet
## AAACCCACAGAAGTGC-1 -0.11390714 -0.2737016 G1 Singlet
## AAACCCAGTAGGGTAC-1 -0.03319338 -0.1208448 G1 Singlet
## AAACCCATCGGTCGAC-1 -0.05704899 -0.2030185 G1 Singlet
## DoubletFinder0.1 preAnno_1 preAnno_2 SP.info
## AAACCCAAGCATCCTA-1 Singlet ILC2 ILC2_4 IL-33_DA
## AAACCCAAGGGCAACT-1 Singlet ILC2 ILC2_3 IL-33
## AAACCCAAGGTGCCAA-1 Singlet ILC2 ILC2_3 PBS
## AAACCCACAGAAGTGC-1 Singlet ILC2 ILC2_1 IL-33
## AAACCCAGTAGGGTAC-1 Singlet ILC2 ILC2_2 IL-33
## AAACCCATCGGTCGAC-1 Singlet ILC2 ILC2_3 IL-33_DA
plota <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "percent.mt", group.by = "orig.ident") +
coord_cartesian(xlim =c(0, 20000), ylim = c(0, 10))
plotb <- FeatureScatter(GEX.seur, feature1 = "nCount_RNA", feature2 = "nFeature_RNA", group.by = "orig.ident") +
coord_cartesian(xlim =c(0, 20000), ylim = c(0, 4000))
plota + plotb
GEX.seur <- FindVariableFeatures(GEX.seur, selection.method = "vst", nfeatures = 800)
# Identify the 10 most highly variable genes
#top10 <- head(VariableFeatures(GEX.seur), 10)
top40 <- head(VariableFeatures(GEX.seur), 40)
# plot variable features with and without labels
plot1 <- VariableFeaturePlot(GEX.seur)
plot2 <- LabelPoints(plot = plot1, points = top40, repel = T)
plot1 + plot2
top40
## [1] "Cxcl1" "Il5" "Cxcl2" "Il13" "Ptgs2" "Cxcl10"
## [7] "Hspa1a" "Ccl1" "Areg" "Cxcl3" "Il17a" "Rrad"
## [13] "Serpine1" "Csf2" "Mt1" "Il1r2" "Akap12" "Tnfsf8"
## [19] "Cd83" "Il2" "Gm10827" "Hdc" "Ccr7" "Gm43305"
## [25] "Gm12840" "Penk" "Bambi" "Atf3" "Calca" "Hspa1b"
## [31] "Ier3" "Tmem80" "Plac8" "Klf2" "Il17f" "Terf2ip"
## [37] "Il4" "Tnfrsf4" "Gem" "AA467197"
GEX.seur <- ScaleData(GEX.seur, features = rownames(GEX.seur))
## Centering and scaling data matrix
# exclude MT genes
MT_gene <- grep("^mt-",rownames(GEX.seur),value = T)
Ribo_gene <- grep("^Rpl|^Rps", rownames(GEX.seur),value = T)
GEX.seur <- RunPCA(GEX.seur, features = setdiff(VariableFeatures(object = GEX.seur),c(MT_gene,Ribo_gene)))
## PC_ 1
## Positive: Txnip, Gm42418, Tmsb4x, Sh3bgrl3, Mns1, S100a6, S100a4, S100a10, Lgals1, Cd69
## Vim, S100a11, Ahnak, Actn2, Capg, Cd3g, Cd52, Tnf, Crip1, Serpina3g
## Csf1, Cflar, Gm20627, Serpinb6a, Stc2, Malat1, Tsc22d3, Dleu2, Filip1l, Lgals3
## Negative: Kdm6b, Rora, Hilpda, Rel, Sub1, Samsn1, Nfkb1, Nr4a3, Junb, Srgn
## Areg, Pim1, Nabp1, Crem, Nfkbia, Ramp3, Rgs2, Odc1, Dot1l, Ifrd1
## Zfp36l1, Dennd4a, Mpp7, Nr4a1, Dusp5, Gata3, Il5, Cdkn1a, H3f3b, Isy1
## PC_ 2
## Positive: Malat1, Lmo4, Neurl3, Gm20186, Fosb, Gm42418, Hes1, Vps37b, Calca, Nr4a1
## Satb1, Kctd12, Cebpb, Hs3st1, Prdx6, Zfp36, Zfp36l1, Junb, Tiparp, Neb
## Bmp2, Gimap6, Ccr9, Jund, Tcrg-C1, Nr4a2, Btg2, Eprs, Cd24a, Fos
## Negative: Lgals1, Cd52, S100a6, Crip1, S100a11, S100a4, Vim, S100a10, Tmsb4x, Sh3bgrl3
## Actb, Ahnak, Capg, Anxa2, Cd83, Serpinb6a, Arg1, Irf4, Lgals3, Tnfsf8
## Zeb2, Myadm, Il18rap, Ttc39c, Dusp4, Bcl2a1d, Pcsk1, Tnfsf4, Alcam, Csf2
## PC_ 3
## Positive: Ramp3, Il5, Ctla2a, Tnfrsf9, Igsf5, Gm42418, Calca, Ell2, Srgn, Errfi1
## Bcl2a1d, Raph1, Ccl1, Crem, Isy1, Cd3g, Lmo4, Fth1, Il13, Tent5a
## Slc18a2, Palld, Hs3st1, Pdcd1, Irs2, Hif1a, Traf4, Rel, Rab27b, Zfyve16
## Negative: Zfp36, Jun, Dusp1, Fos, Hspa1b, Junb, Ltb, Ier2, Nfkbia, Hspa1a
## Txnip, Klf6, Hsp90aa1, Fosb, Nr4a1, Gpr183, Dnajb1, Cxcr4, Klf2, Gm17056
## Dnaja1, Hes1, Bmp2, Phlda1, Rhob, Lztfl1, Mns1, Btg2, Tcrg-C4, Rgs2
## PC_ 4
## Positive: Gadd45b, Bhlhe40, Ccl1, Cd69, Fosb, Spry1, Srgn, Egr3, Pim1, Nr4a1
## Uhrf2, Csf2, Nfkbia, Il2, Map3k8, Nr4a2, Cxcl2, Plk3, Tnfsf11, Rora
## Malt1, Lif, Rgcc, Areg, Dennd4a, Sla, Mir155hg, Ifrd1, Tob2, Arg1
## Negative: Dnaja1, Txnip, Hsp90aa1, Tnfrsf9, Hspa1b, Id2, Hspa1a, Pcyt1a, Dnajb1, Isy1
## Gata3, Icos, Dhx40, Tent5a, Ly6a, Socs1, D16Ertd472e, Crem, Ddit4, Plin2
## Hist1h1e, Plaur, Kcnq1ot1, Cebpb, Kdm2b, Srxn1, Abcb1b, Dgat2, Dgat1, Cwc25
## PC_ 5
## Positive: Cd3g, Fosb, Ctla2a, Klf6, Ipmk, Socs2, Gm26802, Fos, Hspa1b, Jun
## Ahnak, Lmna, Hs3st1, Rab27b, Rhob, Hist1h2bc, Dusp1, Ythdc1, Jund, Enah
## Txnip, Hsp90aa1, Hspa1a, Wwtr1, Dnajb1, Egr1, Tsc22d3, Gpr171, Stc2, Malat1
## Negative: Stab2, Cd83, Pxdc1, Icos, Gem, Gpr183, Tmsb4x, Ankrd33b, Lztfl1, Gm45716
## Bmp2, B3gnt2, Gadd45b, Igfbp4, Ikzf3, Ccr7, Tnfsf4, Ier5l, Arl5c, Cd70
## Gm43305, Ltb, Sema6d, Ccr4, Pim1, Nr4a1, Pde4b, Tnfsf9, Dgat2, Cxcr6
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "preAnno_1") +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "preAnno_1")
DimPlot(GEX.seur, reduction = "pca",dims = 1:2, group.by = "preAnno_2") +
DimPlot(GEX.seur, reduction = "pca",dims = 3:4, group.by = "preAnno_2")
DimHeatmap(GEX.seur, dims = 1:24, cells = 1500, balanced = TRUE,ncol = 4)
ElbowPlot(GEX.seur,ndims = 24)
would take PCs 1-15 for next
PCs <- 1:15
GEX.seur <- FindNeighbors(GEX.seur, dims = PCs,k.param = 80)
## Computing nearest neighbor graph
## Computing SNN
GEX.seur <- FindClusters(GEX.seur, resolution = 0.6, method = 'igraph')
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 9076
## Number of edges: 1371934
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7100
## Number of communities: 6
## Elapsed time: 5 seconds
#GEX.seur <- RunTSNE(GEX.seur, dims=PCs, max_iter = 2000)
#GEX.seur <- RunUMAP(GEX.seur, dims=PCs, n.neighbors = 80)
DimPlot(GEX.seur, reduction = "tsne", label = T) + DimPlot(GEX.seur, reduction = "umap", label = T)
DimPlot(GEX.seur, reduction = "umap", group.by = "DoubletFinder0.1",label = F, repel = F, label.size = 3.5) +
FeaturePlot(GEX.seur, reduction = "umap", features = "percent.mt")
FeaturePlot(GEX.seur, reduction = "umap", features = c("nFeature_RNA","nCount_RNA"))
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "SP.info")
VlnPlot(GEX.seur, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0.1, group.by = "FB.info")
DimPlot(GEX.seur, group.by = "FB.info", split.by = "FB.info", ncol = 4,
cols = c("#05BE78","#5ECDF2","#739BDD","#B7E16F","#87C0C9","#7756FA","#9855B9","#6F9920"))
DimPlot(GEX.seur, group.by = "SP.info", split.by = "SP.info", ncol = 3)
DotPlot(GEX.seur, features = genes_to_check[genes_to_check %in% rownames(GEX.seur)],
assay='RNA' , group.by = "seurat_clusters" ) + coord_flip()
genes_to_check <- c("Fcer1g","Klrb1c","Ncr1","Il22",
"Ccr6","Tbx21","Ifng","Eomes",
"Gata3","Il1rl1","Il13","Il17a",
"Il4","Il5","Klrg1","Rorc",
"Il7r","Thy1","Kdm6b","Ptprc",
"Cd3d","Cd3g","Cd4",
#"Cd8a",
"Trac","Trdc","Epcam","Pecam1","Siglech",
"Hist1h1b","Top2a","Mcm6","Mki67")
FeaturePlot(GEX.seur, features = genes_to_check, ncol = 4)
#
genes_nature2017 <- c("Tbx21","Ifng","Ccl5","Il12rb1",
"Gata3","Il1rl1","Il5","Il13",
"Rorc","Il17a","Ahr","Il23r")
DotPlot(GEX.seur, features = genes_nature2017[genes_nature2017 %in% rownames(GEX.seur)],
assay='RNA' , group.by = "seurat_clusters" ) + coord_flip()
FeaturePlot(GEX.seur, features = genes_nature2017, ncol = 4)
# find markers for every cluster compared to all remaining cells, report only the positive ones
#GEX.markers.ILC2_adv <- FindAllMarkers(GEX.seur, only.pos = TRUE, min.pct = 0.1, logfc.threshold = 0.1,
# test.use = "MAST")
GEX.markers.ILC2_adv <- read.table("GEX.markers.ILC2_adv.1110.csv", header = TRUE, sep = ",")
markers.top16 <- GEX.markers.ILC2_adv %>% group_by(cluster) %>% filter(avg_log2FC>0.25) %>% top_n(n = 16, wt = avg_log2FC)
markers.top16
## # A tibble: 96 x 7
## # Groups: cluster [6]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <chr>
## 1 6.78e-71 0.333 1 1 1.02e-66 0 Gm42418
## 2 3.40e-69 0.438 0.635 0.570 5.13e-65 0 Gimap3
## 3 3.54e-68 0.354 0.438 0.381 5.33e-64 0 Crebzf
## 4 1.05e-63 0.390 0.35 0.268 1.59e-59 0 Fam122a
## 5 9.60e-59 0.295 0.706 0.711 1.45e-54 0 Tra2a
## 6 4.90e-57 0.260 0.891 0.905 7.39e-53 0 Rps27rt
## 7 1.06e-56 0.276 0.439 0.412 1.60e-52 0 Snhg12
## 8 1.38e-46 0.301 0.713 0.692 2.09e-42 0 Bcl2
## 9 3.99e-46 0.271 0.71 0.7 6.01e-42 0 Cnot6l
## 10 1.31e-45 0.282 0.406 0.359 1.97e-41 0 Herc4
## # ... with 86 more rows
#GEX.markers.ILC2_pre %>% group_by(cluster) %>% filter(pct.1 > 0.2 & pct.2 <0.1) %>% top_n(n = 8, wt = avg_log2FC)
#GEX.markers.ILC2_pre %>% group_by(cluster) %>% top_n(n = 80, wt = avg_log2FC) %>% filter(gene %in% c("Top2a","Mki67") )
#
DotPlot(GEX.seur, features = markers.top16[!duplicated(markers.top16$gene),"gene"],
assay='RNA' , group.by = "seurat_clusters" ) + coord_flip()
FeaturePlot(GEX.seur, features = markers.top16$gene[1:96],
reduction = 'umap')
VlnPlot(GEX.seur, features = markers.top16$gene[1:96],
group.by = "seurat_clusters")
GEX.seur$Anno_ILC2 <- factor(paste0("ILC2_",as.character(GEX.seur$seurat_clusters)),
levels = paste0("ILC2_",c(0,4,1,5,2,3)))
marker.new <- c((markers.top16 %>% filter(cluster==0))$gene,
(markers.top16 %>% filter(cluster==4))$gene,
(markers.top16 %>% filter(cluster==1))$gene,
(markers.top16 %>% filter(cluster==5))$gene,
(markers.top16 %>% filter(cluster==2))$gene,
(markers.top16 %>% filter(cluster==3))$gene)
p.heat_top16 <- DoHeatmap(GEX.seur , group.by = "Anno_ILC2",
features = marker.new) +
NoLegend() + labs(title = "ILC2_newclusters top16\n")
p.heat_top16
marker.top80 <- c((GEX.markers.ILC2_adv %>% group_by(cluster) %>% filter(avg_log2FC >0.25) %>% top_n(n = 80, wt = avg_log2FC) %>% filter(cluster==0))$gene,
(GEX.markers.ILC2_adv %>% group_by(cluster) %>% filter(avg_log2FC >0.25) %>% top_n(n = 80, wt = avg_log2FC) %>% filter(cluster==4))$gene,
(GEX.markers.ILC2_adv %>% group_by(cluster) %>% filter(avg_log2FC >0.25) %>% top_n(n = 80, wt = avg_log2FC) %>% filter(cluster==1))$gene,
(GEX.markers.ILC2_adv %>% group_by(cluster) %>% filter(avg_log2FC >0.25) %>% top_n(n = 80, wt = avg_log2FC) %>% filter(cluster==5))$gene,
(GEX.markers.ILC2_adv %>% group_by(cluster) %>% filter(avg_log2FC >0.25) %>% top_n(n = 80, wt = avg_log2FC) %>% filter(cluster==2))$gene,
(GEX.markers.ILC2_adv %>% group_by(cluster) %>% filter(avg_log2FC >0.25) %>% top_n(n = 80, wt = avg_log2FC) %>% filter(cluster==3))$gene)
p.heat_top80 <- DoHeatmap(GEX.seur , group.by = "Anno_ILC2",
features = marker.top80) +
NoLegend() + labs(title = "ILC2_newclusters top80\n")+ theme( axis.text.y = element_text( size = 4 ))
p.heat_top80
lapply(GEX.DEGs_adv,function(x) x %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC))
## $ILC2_0
## # A tibble: 18 x 8
## # Groups: cluster [3]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene Anno_ILC2
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr>
## 1 6.16e-14 0.530 0.49 0.376 9.29e-10 PBS Gm47283 ILC2_0
## 2 1.64e-11 0.403 0.804 0.702 2.48e- 7 PBS Rbm3 ILC2_0
## 3 7.68e- 9 0.329 0.947 0.878 1.16e- 4 PBS S100a4 ILC2_0
## 4 1.94e- 7 0.344 0.921 0.871 2.93e- 3 PBS Nrip1 ILC2_0
## 5 3.09e- 6 0.316 0.686 0.626 4.66e- 2 PBS Gimap3 ILC2_0
## 6 2.29e- 5 0.335 0.548 0.441 3.45e- 1 PBS Nup98 ILC2_0
## 7 1.17e- 4 0.377 0.472 0.378 1.00e+ 0 PBS Arg1 ILC2_0
## 8 9.23e- 3 0.312 0.496 0.437 1.00e+ 0 PBS Tnfsf11 ILC2_0
## 9 2.19e-14 0.277 0.998 0.992 3.30e-10 IL-33 Hsp90ab1 ILC2_0
## 10 1.38e- 6 0.268 0.533 0.429 2.08e- 2 IL-33 Ltb4r1 ILC2_0
## 11 2.47e- 6 0.262 0.826 0.763 3.72e- 2 IL-33 Tnfrsf18 ILC2_0
## 12 4.48e- 4 0.278 0.669 0.589 1.00e+ 0 IL-33 Calca ILC2_0
## 13 8.85e- 4 0.253 0.612 0.548 1.00e+ 0 IL-33 Hsp90aa1 ILC2_0
## 14 3.87e- 8 0.376 0.546 0.433 5.84e- 4 IL-33_DA Gm43305 ILC2_0
## 15 6.54e- 7 0.350 0.536 0.492 9.87e- 3 IL-33_DA Odc1 ILC2_0
## 16 7.48e- 5 0.254 0.502 0.443 1.00e+ 0 IL-33_DA St8sia4 ILC2_0
## 17 9.94e- 5 0.308 0.586 0.528 1.00e+ 0 IL-33_DA Nr4a2 ILC2_0
## 18 3.98e- 4 0.324 0.742 0.702 1.00e+ 0 IL-33_DA AY036118 ILC2_0
##
## $ILC2_4
## # A tibble: 24 x 8
## # Groups: cluster [3]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene Anno_ILC2
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr>
## 1 6.31e-12 0.672 0.667 0.422 0.0000000952 PBS Gm47283 ILC2_4
## 2 3.63e- 5 0.676 0.456 0.344 0.548 PBS Mxd1 ILC2_4
## 3 3.48e- 4 0.450 0.825 0.798 1 PBS Nrip1 ILC2_4
## 4 4.21e- 4 1.35 0.246 0.166 1 PBS Atf3 ILC2_4
## 5 5.09e- 4 0.520 0.667 0.556 1 PBS Cebpb ILC2_4
## 6 6.25e- 4 0.500 0.316 0.242 1 PBS Snhg15 ILC2_4
## 7 3.04e- 3 0.547 0.228 0.178 1 PBS Ccl4 ILC2_4
## 8 6.55e- 3 0.641 0.588 0.481 1 PBS Hes1 ILC2_4
## 9 4.21e- 6 0.551 0.431 0.296 0.0635 IL-33 Egr1 ILC2_4
## 10 5.18e- 6 0.304 0.74 0.65 0.0781 IL-33 Ltb4r1 ILC2_4
## # ... with 14 more rows
##
## $ILC2_1
## # A tibble: 22 x 8
## # Groups: cluster [3]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene Anno_ILC2
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr>
## 1 3.03e-15 0.681 0.585 0.408 4.57e-11 PBS Gm47283 ILC2_1
## 2 6.60e- 8 0.504 0.88 0.819 9.96e- 4 PBS Lmo4 ILC2_1
## 3 6.65e- 8 0.526 0.76 0.648 1.00e- 3 PBS Neurl3 ILC2_1
## 4 4.15e- 7 0.375 0.975 0.984 6.26e- 3 PBS Junb ILC2_1
## 5 2.53e- 6 0.395 0.69 0.644 3.82e- 2 PBS Ptpn13 ILC2_1
## 6 4.10e- 6 0.381 0.97 0.945 6.18e- 2 PBS Zfp36l2 ILC2_1
## 7 8.46e- 6 0.389 0.81 0.746 1.28e- 1 PBS Vps37b ILC2_1
## 8 1.85e- 3 0.383 0.245 0.212 1.00e+ 0 PBS Hist1h1c ILC2_1
## 9 5.34e- 9 0.424 0.392 0.263 8.06e- 5 IL-33 Cd3g ILC2_1
## 10 1.57e- 8 0.251 0.7 0.569 2.37e- 4 IL-33 Hsp90aa1 ILC2_1
## # ... with 12 more rows
##
## $ILC2_5
## # A tibble: 24 x 8
## # Groups: cluster [3]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene Anno_ILC2
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr>
## 1 6.96e- 9 0.732 0.581 0.688 1.05e- 4 PBS Stat4 ILC2_5
## 2 3.39e- 7 0.818 0.452 0.17 5.11e- 3 PBS Hexim1 ILC2_5
## 3 7.16e- 5 0.817 0.581 0.548 1.00e+ 0 PBS Spry1 ILC2_5
## 4 1.21e- 4 0.701 0.903 0.868 1.00e+ 0 PBS Itgav ILC2_5
## 5 1.06e- 3 1.07 0.839 0.609 1.00e+ 0 PBS Ramp3 ILC2_5
## 6 1.20e- 3 1.02 0.903 0.69 1.00e+ 0 PBS Cxcl2 ILC2_5
## 7 1.60e- 3 0.759 0.903 0.8 1.00e+ 0 PBS Klf6 ILC2_5
## 8 3.14e- 3 0.779 0.903 0.933 1.00e+ 0 PBS Fos ILC2_5
## 9 5.71e-16 0.404 0.982 0.961 8.61e-12 IL-33 Coro1a ILC2_5
## 10 2.24e-14 0.474 0.706 0.437 3.38e-10 IL-33 Mns1 ILC2_5
## # ... with 14 more rows
##
## $ILC2_2
## # A tibble: 19 x 8
## # Groups: cluster [3]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene Anno_ILC2
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr>
## 1 7.42e- 9 0.494 0.562 0.373 0.000112 PBS Gm47283 ILC2_2
## 2 2.17e- 6 0.332 0.857 0.767 0.0327 PBS Rbm3 ILC2_2
## 3 4.64e- 6 0.382 0.906 0.869 0.0700 PBS Zfp36l2 ILC2_2
## 4 1.04e- 5 0.329 0.888 0.875 0.157 PBS S100a4 ILC2_2
## 5 5.32e- 5 0.365 0.79 0.71 0.802 PBS Hsp90aa1 ILC2_2
## 6 8.28e- 5 0.336 0.513 0.455 1 PBS Zcchc10 ILC2_2
## 7 4.47e- 3 0.348 0.692 0.593 1 PBS Hes1 ILC2_2
## 8 7.13e- 3 0.357 0.201 0.174 1 PBS Dgat2 ILC2_2
## 9 8.22e- 5 0.371 0.751 0.679 1 IL-33 Csf2 ILC2_2
## 10 5.26e- 4 0.361 0.654 0.567 1 IL-33 Cxcl2 ILC2_2
## 11 3.33e- 3 0.311 0.411 0.33 1 IL-33 Ccrl2 ILC2_2
## 12 2.88e-10 0.402 0.671 0.515 0.00000434 IL-33_DA Gm43305 ILC2_2
## 13 9.04e- 6 0.275 0.411 0.344 0.136 IL-33_DA Jarid2 ILC2_2
## 14 1.71e- 5 0.296 0.42 0.328 0.258 IL-33_DA Cd27 ILC2_2
## 15 2.36e- 5 0.265 0.17 0.106 0.356 IL-33_DA Epcam ILC2_2
## 16 1.86e- 4 0.270 0.687 0.665 1 IL-33_DA Icos ILC2_2
## 17 3.81e- 4 0.280 0.705 0.63 1 IL-33_DA Ramp3 ILC2_2
## 18 6.05e- 4 0.285 0.728 0.711 1 IL-33_DA AY036118 ILC2_2
## 19 8.81e- 4 0.250 0.765 0.767 1 IL-33_DA Odc1 ILC2_2
##
## $ILC2_3
## # A tibble: 24 x 8
## # Groups: cluster [3]
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene Anno_ILC2
## <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <chr> <chr>
## 1 3.58e- 7 0.472 0.604 0.48 0.00540 PBS Gm47283 ILC2_3
## 2 8.70e- 6 0.413 0.732 0.69 0.131 PBS 4932438A13Rik ILC2_3
## 3 2.47e- 5 0.351 0.47 0.363 0.372 PBS Gm2000 ILC2_3
## 4 5.36e- 4 0.409 0.336 0.24 1 PBS Lpar6 ILC2_3
## 5 5.86e- 4 0.376 0.356 0.256 1 PBS Tuba4a ILC2_3
## 6 4.03e- 3 0.431 0.946 0.912 1 PBS Eprs ILC2_3
## 7 6.11e- 3 0.333 0.43 0.37 1 PBS Mast4 ILC2_3
## 8 9.96e- 3 0.336 0.826 0.763 1 PBS Cebpb ILC2_3
## 9 1.31e-13 0.452 0.754 0.552 0.00000000197 IL-33 Ltb4r1 ILC2_3
## 10 3.74e- 6 0.312 0.69 0.554 0.0563 IL-33 Sh2d2a ILC2_3
## # ... with 14 more rows
lapply(GEX.DEGs_adv,dim)
## $ILC2_0
## [1] 23 8
##
## $ILC2_4
## [1] 75 8
##
## $ILC2_1
## [1] 68 8
##
## $ILC2_5
## [1] 421 8
##
## $ILC2_2
## [1] 29 8
##
## $ILC2_3
## [1] 59 8
vln_DEGs.adv <- list()
Idents(GEX.seur) <- "SP.info"
for(nn in names_adv){
pp.test <- VlnPlot(subset(GEX.seur, subset = Anno_ILC2 == nn),
features = (GEX.DEGs_adv[[nn]] %>% group_by(cluster) %>% top_n(n = 8, wt = avg_log2FC))$gene,
combine = F)
title <- cowplot::ggdraw() + cowplot::draw_label(nn, fontface = 'bold', size = 9)
for(pp in 1:length(pp.test)){
pp.test[[pp]] <- pp.test[[pp]] + NoLegend()
}
vln_DEGs.adv[[nn]] <- cowplot::plot_grid(title = title,plotlist = pp.test, ncol = 4)
}
Idents(GEX.seur) <- "Anno_ILC2"
## $ILC2_0
##
## $ILC2_4
##
## $ILC2_1
##
## $ILC2_5
##
## $ILC2_2
##
## $ILC2_3
Idents(GEX.seur) <- "Anno_ILC2"
DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "FB.info",
cols = c("#05BE78","#5ECDF2","#739BDD","#B7E16F","#87C0C9","#7756FA","#9855B9","#6F9920"))+
DimPlot(GEX.seur, reduction = "umap", label = T)
DimPlot(GEX.seur, group.by = "FB.info", split.by = "FB.info", ncol = 4,
cols = c("#05BE78","#5ECDF2","#739BDD","#B7E16F","#87C0C9","#7756FA","#9855B9","#6F9920"))
table(data.frame(GEX.seur$FB.info,GEX.seur$Anno_ILC2))
## GEX.seur.Anno_ILC2
## GEX.seur.FB.info ILC2_0 ILC2_4 ILC2_1 ILC2_5 ILC2_2 ILC2_3
## hashtag1 101 30 26 8 82 64
## hashtag2 240 84 174 23 142 85
## hashtag3 239 212 389 327 203 115
## hashtag4 266 147 304 97 168 110
## hashtag5 313 175 413 178 256 181
## hashtag6 378 214 197 84 236 143
## hashtag7 303 73 259 170 188 204
## hashtag8 449 136 172 26 396 296
pheatmap::pheatmap(table(data.frame(GEX.seur$FB.info,GEX.seur$Anno_ILC2)),
main = "Cell Count",
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f")
pheatmap::pheatmap(100*rowRatio(table(data.frame(GEX.seur$FB.info,GEX.seur$Anno_ILC2))),
main = "Percentage of Sample count(row)",
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.2f")
DimPlot(GEX.seur, reduction = "umap", label = F,group.by = "SP.info",
cols = c("#76797C","#0000C8","#FDB911"))+
DimPlot(GEX.seur, reduction = "umap", label = T)
DimPlot(GEX.seur, group.by = "SP.info", split.by = "SP.info",cols = c("#76797C","#0000C8","#FDB911"), ncol = 3)
table(data.frame(GEX.seur$SP.info,GEX.seur$Anno_ILC2))
## GEX.seur.Anno_ILC2
## GEX.seur.SP.info ILC2_0 ILC2_4 ILC2_1 ILC2_5 ILC2_2 ILC2_3
## PBS 341 114 200 31 224 149
## IL-33 818 534 1106 602 627 406
## IL-33_DA 1130 423 628 280 820 643
pheatmap::pheatmap(table(data.frame(GEX.seur$SP.info,GEX.seur$Anno_ILC2)),
main = "Cell Count",
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.0f")
pheatmap::pheatmap(100*rowRatio(table(data.frame(GEX.seur$SP.info,GEX.seur$Anno_ILC2))),
main = "Percentage of Sample count(row)",
cluster_rows = F, cluster_cols = F, display_numbers = T, number_format = "%.2f")
basically follow https://bitbucket.org/jerry00/mouse_small_intestine_immune_cell_atlas/src/master/script/topicModeling_PP.R
source("../analysis_1101/topicmodel.r")
# using umap coordinates
GEX.seur@reductions[['umap']]@cell.embeddings[1:5,1:2]
## UMAP_1 UMAP_2
## AAACCCAAGCATCCTA-1 -1.1394443 3.0657358
## AAACCCAAGGGCAACT-1 -3.0438967 -0.2278638
## AAACCCAAGGTGCCAA-1 -3.9796829 -1.0369316
## AAACCCACAGAAGTGC-1 -0.7091646 -1.2534755
## AAACCCAGTAGGGTAC-1 4.3127932 1.5042106
# to exclude ribosomal
ribo.loc <- grepl("Rpl|Rps", rownames(GEX.seur@assays[['RNA']]@counts))
head(rownames(GEX.seur@assays[['RNA']]@counts)[ribo.loc])
## [1] "Rpl7" "Rpl31" "Rpl37a" "Rps6kc1" "Rpl7a" "Rpl12"
sum(ribo.loc)
## [1] 96
# Build topic models and generate assocaited figures
# Recommended to build models for k = 3,4,5,6,7,8,9,10 and tolerance 0.1 (set as inputs to this function)
# each K would take hours, bigger K, longer time
# so slow, would run into three sub-scripts from 3 to 10
#for(kk in c(3,4,5,6,7,8,9,10)){
# kkk <- kk
# if(kk %in% c(4,6,8)){
# kkk <- paste0(0,kk)
# }
# FitGoM(t(as.matrix(GEX.seur@assays[['RNA']]@counts)[!ribo.loc,]),
# K = kk, tol = 0.1,
# path_rda =paste0("topicmodel/test_topic.n",kkk,"_t0.1.rda"))
#}